*-------------------------------------------------------------------------------
*						Data Pre-Process
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out"  

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Pre-country", replace 
set more off


**------------------------------------------------------------------------------
* Step1: Generate Data
* /*CHN consistent ADJ occ*/
**------------------------------------------------------------------------------
import excel "$App_data\process-CHN_ACSSOC2010_v4.xlsx", sheet("Sheet1") firstrow allstring clear
drop 中美职业统一代码1n则用中国的n1则用美国的nn则自 countchn countusa NOTE

foreach var in consistent complement title_consistent occsoc{
replace `var'=subinstr(`var'," ","",.)
}

drop if consistent==""&complement =="" &title_consistent=="" & occsoc=="" & miscellaneouslifephysicalan==""
drop if 最终调整过后的==""|最终调整过后的=="0"

keep consistent complement title_consistent 最终调整过后的
duplicates drop

rename consistent consis_sem

merge 1:m title_consistent using CHN-consistent_ONETSOC2010

keep title_consistent consistent 最终调整过后的
duplicates drop
compress

order consistent
gen version="soc2010"
sort consistent
save CHN-SOC-consistent,replace

**------------------------------------------------------------------------------
* Step1: Generate Data
	/*USA 与SOC2010*/
**------------------------------------------------------------------------------
import excel "$App_data\ACS\ACS-SOC2010.xlsx", sheet("Sheet2") firstrow allstring clear
drop if A==""
compress
rename A occsoc
gen occsoc5dig=substr(occsoc,1,5)
gen occsoc4dig=substr(occsoc,1,4)
save ACS-SOC2010.dta,replace


import excel "$App_data\process-CHN_ACSSOC2010_v4.xlsx", sheet("Sheet1") firstrow allstring clear
drop 中美职业统一代码1n则用中国的n1则用美国的nn则自 countchn countusa NOTE

foreach var in consistent complement title_consistent occsoc{
replace `var'=subinstr(`var'," ","",.)
}

drop if consistent==""&complement =="" &title_consistent=="" & occsoc=="" & miscellaneouslifephysicalan==""
drop if 最终调整过后的==""|最终调整过后的=="0"

keep occsoc miscellaneouslifephysicalan 最终调整过后的
duplicates drop
drop if occsoc==""

merge 1:m occsoc using ACS-SOC2010.dta

keep occsoc 最终调整过后的
duplicates drop
gen version="soc2010"
sort occsoc
compress
save USA-SOC-consistent2010,replace

**------------------------------------------------------------------------------
* Step1: Generate Data
	/*USA 与SOC2000*/
**------------------------------------------------------------------------------

*SOC2010 TO SOC2000 codebook
import excel "$App_data\ACS\ACS-SOC2010.xlsx", sheet("Sheet3") firstrow allstring clear
rename A occsoc 
rename B occsoc2000
save "ACS-SOC-2000_LINK_2010.dta",replace

*USA Consistnet to SOC2010
import excel "$App_data\process-CHN_ACSSOC2010_v4.xlsx", sheet("Sheet1") firstrow allstring clear
drop 中美职业统一代码1n则用中国的n1则用美国的nn则自 countchn countusa NOTE

foreach var in consistent complement title_consistent occsoc{
replace `var'=subinstr(`var'," ","",.)
}

drop if consistent==""&complement =="" &title_consistent=="" & occsoc=="" & miscellaneouslifephysicalan==""
drop if 最终调整过后的==""|最终调整过后的=="0"

keep occsoc miscellaneouslifephysicalan 最终调整过后的
duplicates drop
drop if occsoc==""

*SOC2010 TO SOC2000 codebook
merge 1:m occsoc using ACS-SOC2010.dta
keep if _m==3
keep occsoc 最终调整过后的
duplicates drop

merge 1:m occsoc using "ACS-SOC-2000_LINK_2010.dta"

keep 最终调整过后的 occsoc2000

gen num=1
bys 最终调整过后的:egen temp=count(num)
drop if temp>1 &occsoc2000==""
count if occsoc2000==""
drop if occsoc2000=="" &最终调整过后的==""

bys occsoc:egen temp2=count(num)
drop if temp2>1 &最终调整过后的==""

keep 最终调整过后的 occsoc2000
duplicates drop
compress
sort occsoc2000
gen version="soc2000"
rename occsoc2000 occsoc
drop if occsoc==""
unique occsoc
save USA-SOC-consistent2000,replace

**------------------------------------------------------------------------------
* Step1: Generate Data
	/*USA 与SOC2015*/
*	soc2015 same with SOC2010 here, only have some sum up
**------------------------------------------------------------------------------
*SOC2010 TO SOC2015 codebook
import excel "$App_data\ACS\ACS-SOC2010.xlsx", sheet("Sheet4") firstrow allstring clear
rename A occsoc 
rename B occsoc2015
save "ACS-SOC-2015_LINK_2010.dta",replace

*USA Consistnet to SOC2010
import excel "$App_data\process-CHN_ACSSOC2010_v4.xlsx", sheet("Sheet1") firstrow allstring clear
drop 中美职业统一代码1n则用中国的n1则用美国的nn则自 countchn countusa NOTE

foreach var in consistent complement title_consistent occsoc{
replace `var'=subinstr(`var'," ","",.)
}

drop if consistent==""&complement =="" &title_consistent=="" & occsoc=="" & miscellaneouslifephysicalan==""
drop if 最终调整过后的==""|最终调整过后的=="0"


keep occsoc miscellaneouslifephysicalan 最终调整过后的
duplicates drop
drop if occsoc==""

*SOC2010 TO SOC2015
merge 1:m occsoc using ACS-SOC2010.dta

keep if _m==3
keep occsoc 最终调整过后的
duplicates drop


merge 1:m occsoc using "ACS-SOC-2015_LINK_2010.dta"

keep 最终调整过后的 occsoc2015

gen num=1
bys 最终调整过后的:egen temp=count(num) if 最终调整过后的!=""
drop if temp>1 &occsoc2015==""
count if occsoc2015==""
drop if occsoc2015=="" &最终调整过后的==""

bys occsoc:egen temp2=count(num)
drop if temp2>1 &最终调整过后的==""

keep 最终调整过后的 occsoc2015
duplicates drop

compress
sort occsoc2015
gen version="soc2015"
rename occsoc2015 occsoc
unique occsoc
save USA-SOC-consistent2015,replace


erase ACS-SOC2010.dta
erase ACS-SOC-2000_LINK_2010.dta
erase ACS-SOC-2015_LINK_2010.dta
**------------------------------------------------------------------------------
/*							OUT PUT File
CHN-SOC-consistent.dta
USA-SOC-consistent2010.dta
USA-SOC-consistent2000.dta
USA-SOC-consistent2015.dta
*/
**------------------------------------------------------------------------------

log close 
